{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "from os import path\n",
    "from PIL import Image\n",
    "from wordcloud import WordCloud, ImageColorGenerator\n",
    "import jieba\n",
    "import matplotlib\n",
    "matplotlib.use('TkAgg')\n",
    "import matplotlib.pyplot as plt\n",
    "import numpy as np\n",
    "import datawash"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# 遍历所有用户，提取点赞，感谢，被关注，被收藏等数据\n",
    "keys = ['voteupCount','thankedCount','followerCount','favoritedCount','answerCount','articlesCount','questionCount']\n",
    "dicts = [dict() for i in range(len(keys))]\n",
    "jsons = datawash.datajsons()\n",
    "for user in jsons:\n",
    "    for i in range(len(keys)):\n",
    "        if not user['name'] in dicts[i]:\n",
    "            try:\n",
    "                dicts[i][user['name']] = user[keys[i]]\n",
    "            except:\n",
    "                pass"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# 字体路径\n",
    "font = path.join(path.abspath('.'), 'fonts','fangzhengqingkebenyuesongjianti.TTF')\n",
    "mask = np.array(Image.open(path.join('image', \"mask1.png\")))\n",
    "image_colors = ImageColorGenerator(mask)\n",
    "keys = ['voteupCount','thankedCount','followerCount','favoritedCount','answerCount','articlesCount','questionCount']\n",
    "\n",
    "for i in range(len(dicts)):\n",
    "    # 设置字体，尺寸，生成词云\n",
    "    wc = WordCloud(font_path=font,background_color='white',max_font_size=250,mask=mask).generate_from_frequencies(dicts[i])\n",
    "    \n",
    "    # 绘图\n",
    "    fig,ax = plt.subplots(1,1)\n",
    "    ax.imshow(wc, interpolation=\"bilinear\")\n",
    "    ax.axis(\"off\")\n",
    "    # 存储\n",
    "    wc.to_file(path.join('image',keys[i]+'.png'))\n",
    "    fig.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# 遍历所有用户，提取学校，专业，公司，职位，行业等数据\n",
    "dicts2 = [dict() for i in range(5)]\n",
    "jsons = datawash.datajsons()\n",
    "for user in jsons:\n",
    "    try:\n",
    "        if not user['educations'][0]['school']['name'] in dicts2[0]:\n",
    "            dicts2[0][user['educations'][0]['school']['name']] = 1\n",
    "        else:\n",
    "            dicts2[0][user['educations'][0]['school']['name']] += 1\n",
    "    except:\n",
    "        pass\n",
    "    \n",
    "    try:\n",
    "        if not user['educations'][0]['major']['name'] in dicts2[1]:\n",
    "            dicts2[1][user['educations'][0]['major']['name']] = 1\n",
    "        else:\n",
    "            dicts2[1][user['educations'][0]['major']['name']] += 1\n",
    "    except:\n",
    "        pass\n",
    "    \n",
    "    try:\n",
    "        if not user['employments'][0]['company']['name'] in dicts2[2]:\n",
    "            dicts2[2][user['employments'][0]['company']['name']] = 1\n",
    "        else:\n",
    "            dicts2[2][user['employments'][0]['company']['name']] += 1\n",
    "    except:\n",
    "        pass\n",
    "    \n",
    "    try:\n",
    "        if not user['employments'][0]['job']['name'] in dicts2[3]:\n",
    "            dicts2[3][user['employments'][0]['job']['name']] = 1\n",
    "        else:\n",
    "            dicts2[3][user['employments'][0]['job']['name']] += 1\n",
    "    except:\n",
    "        pass\n",
    "    \n",
    "    try:\n",
    "        if not user['business']['name'] in dicts2[4]:\n",
    "            dicts2[4][user['business']['name']] = 1\n",
    "        else:\n",
    "            dicts2[4][user['business']['name']] += 1\n",
    "    except:\n",
    "        pass"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# 字体路径\n",
    "font = path.join(path.abspath('.'), 'fonts','fangzhengqingkebenyuesongjianti.TTF')\n",
    "mask = np.array(Image.open(path.join('image', \"mask2.png\")))\n",
    "image_colors = ImageColorGenerator(mask)\n",
    "keys = ['school','major','company','job','business']\n",
    "\n",
    "for i in range(len(dicts2)):\n",
    "    # 设置字体，尺寸，生成词云\n",
    "    wc = WordCloud(font_path=font,background_color='white',max_font_size=200,mask=mask).generate_from_frequencies(dicts2[i])\n",
    "    \n",
    "    # 绘图\n",
    "    fig,ax = plt.subplots(1,1)\n",
    "    ax.imshow(wc, interpolation=\"bilinear\")\n",
    "    ax.axis(\"off\")\n",
    "    # 存储\n",
    "    wc.to_file(path.join('image',keys[i]+'.png'))\n",
    "    fig.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 73,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# # 遍历所有用户，提取用户名，签名，个人描述\n",
    "# keys = ['name','headline','description']\n",
    "# lists = [list() for i in range(len(keys))]\n",
    "# jsons = datawash.datajsons()\n",
    "# for user in jsons:\n",
    "#     for i in range(len(keys)):\n",
    "#         try:\n",
    "#             lists[i].append(user[keys[i]])\n",
    "#         except:\n",
    "#             pass\n",
    "\n",
    "# dicts3 = [dict() for i in range(len(keys))]\n",
    "# for i in range(len(lists)):\n",
    "#     for j in lists[i]:\n",
    "#         seg_list = jieba.cut(j, cut_all=False)\n",
    "#         for k in seg_list:\n",
    "#             if not k in dicts3[i]:\n",
    "#                 dicts3[i][k] = 1\n",
    "#             else:\n",
    "#                 dicts3[i][k] += 1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 78,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# # 字体路径\n",
    "# font = path.join(path.abspath('.'), 'fonts','fangzhengqingkebenyuesongjianti.TTF')\n",
    "# mask = np.array(Image.open(path.join('image', \"mask2.png\")))\n",
    "# image_colors = ImageColorGenerator(mask)\n",
    "# keys = ['name','headline','description']\n",
    "\n",
    "# for i in range(len(dicts3)):\n",
    "#     # 设置字体，尺寸，生成词云\n",
    "#     wc = WordCloud(font_path=font,background_color='white',mask=mask).generate_from_frequencies(dicts3[i])\n",
    "    \n",
    "#     # 绘图\n",
    "#     fig,ax = plt.subplots(1,1)\n",
    "#     ax.imshow(wc.recolor(color_func=image_colors), interpolation=\"bilinear\")\n",
    "#     ax.axis(\"off\")\n",
    "#     # 存储\n",
    "#     wc.to_file(path.join('image',keys[i]+'.png'))\n",
    "#     fig.show()"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.1"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
